1 loading libraries

library(limma)
library(WGCNA)
library(edgeR)
library(org.Hs.eg.db)
library(biomaRt)
library(ggplot2)
library(clusterProfiler)
library(ComplexHeatmap)
library(igraph)
library(networkD3)
library(ggpubr)
library(stringr)

2 Importing data

#Read GSE39612 data ============================================
edat<-readRDS("/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/gse39612/renormalized_samples_edat_no_celllines.RDS")
sample_list_renormalized<-readRDS("/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/gse39612/renormalized_samples_list_no_celllines.RDS")
GSE39612_annotation<-readRDS(file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/gse39612/GSE39612_annotation.RDS")

3 Performing DEG analysis on normal skin samples and MCC tumor samples

#DEGs for normal vs. MCC tumor samples ============================================
group<-factor(c(rep("normal", 64),  rep("tumor", 30)))
names(group)<-sample_list_renormalized$ID
design = model.matrix(~0 + group)
comparison = "grouptumor-groupnormal"
cont=makeContrasts(comparison,levels=design)
fit=lmFit(edat,design)
fit=contrasts.fit(fit,contrasts=cont)
tfit=treat(fit)
efit=eBayes(tfit)
tdf=topTable(efit,coef=1, n = Inf, adjust = "BH")
#write.csv(tdf, file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/gse39612/gse39612_tumor_vs_normal_DEGs_analysis.csv")

4 Performing GO term over-representation analysis on DEGs from normal skin vs. MCC tumors comparison

gse39612_mcc.vs.normal_sig<-tdf[abs(tdf$logFC) >=1 & abs(tdf$adj.P.Va) <= 0.05,]
genes<-rownames(gse39612_mcc.vs.normal_sig)
go_term<-enrichGO(genes,
                  OrgDb = org.Hs.eg.db, 
                  ont='BP',
                  pAdjustMethod = 'BH',
                  pvalueCutoff = 0.05, 
                  qvalueCutoff = 0.25,
                  keyType = 'SYMBOL')
df<-go_term@result
write.csv(df, file = "/xdisk/mpadi/jiawenyang/result/merkel_cell/pp_in_mcc_paper/table/GSE39612_tumor_vs_normal_GO_enrichment.csv")
#saveRDS(go_term, file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/gse39612/GSE39612_tumor_vs_normal_GO_enrichment_result.rds")

4.1 Plotting enriched go terms

go_term<-readRDS("/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/gse39612/GSE39612_tumor_vs_normal_GO_enrichment_result.rds")
p<-dotplot(go_term, showCategory = 20, font.size = 20, x = "Count")
print(p)

4.2 Plotting heatmap for Wnt signaling pathway genes from DEGs

wnt_signaling_genes<-strsplit(go_term@result[go_term@result$Description == "Wnt signaling pathway","geneID"][1], "/")[[1]]
wnt_signaling_genes_scaled<-t(scale(t(edat[wnt_signaling_genes, sample_list_renormalized$ID])))
wnt_signaling_genes_scaled<-na.omit(wnt_signaling_genes_scaled)

GSE39612_annotation<-GSE39612_annotation[sample_list_renormalized$ID,]
GSE39612_annotation<-GSE39612_annotation[,c("tissue:ch1", "merkel cell polyomavirus status (dna and rna):ch1", "metastasis, primary, or cell line:ch1")]
colnames(GSE39612_annotation)<-c("group", "MCPyV_status", "MCC_status")
GSE39612_annotation$MCPyV_status[!GSE39612_annotation$MCPyV_status %in% c("negative", "positive")]<-"not_available"
GSE39612_annotation$MCC_status[!GSE39612_annotation$MCC_status %in% c("primary tumor", "metastatic tumor")]<-"not_available"

annotation_GSE39612_WNT = HeatmapAnnotation(
  group = GSE39612_annotation$group,
  MCC_status = GSE39612_annotation$MCC_status,
  MCPyV_status = GSE39612_annotation$MCPyV_status,
  col = list(group= c(`normal skin` = "#B7C9F3", `Merkel cell carcinoma` = "#A4312A"),
             MCC_status = c(`not_available` = "#C9CFD0", `metastatic tumor` = "#C47BFC", `primary tumor` = "#01BDC2"),
             MCPyV_status = c(`not_available` = "#C9CFD0", `negative` = "#CCFF99", `positive` = "#FF8000")
  ),
  simple_anno_size = unit(4, "mm"),
  annotation_name_gp= gpar(fontsize = 10, fontface = "bold", col = "black"))

group_column_GSE39612 = factor(GSE39612_annotation$group, levels = c("normal skin", "Merkel cell carcinoma"))

Heatmap(wnt_signaling_genes_scaled,
        heatmap_legend_param = list(title = "Z-scored", title_position = "topleft", legend_height = unit(8, "cm"), legend_direction = "vertical"),
        row_title = "genes",
        cluster_columns = cluster_within_group(wnt_signaling_genes_scaled, group_column_GSE39612),
        column_dend_side = "top",
        show_column_dend = T,
        column_title = "GSE39612 samples",
        show_column_names = F,
        column_title_side = "top",
        column_title_gp = gpar(fontsize = 18, fontface = "bold", col="black"),
        row_names_gp = gpar(fontsize = 10, fontface = "bold"),
        top_annotation = annotation_GSE39612_WNT,
        width = ncol(wnt_signaling_genes_scaled)*unit(3, "mm"), 
        height = nrow(wnt_signaling_genes_scaled)*unit(3, "mm"),
        border = T
)

5 Performing WGCNA on MCC tumor samples

nettype = "signed"
powers = c(c(1:10), seq(from = 12, to=20, by=2))

MCC_expression_data<-edat[, rownames(GSE39612_annotation[GSE39612_annotation$group=="Merkel cell carcinoma",])]

sft_MCC=pickSoftThreshold(t(MCC_expression_data), dataIsExpr = TRUE, powerVector = powers, networkType = nettype, verbose = 5)

# Scale-free topology fit index as a function of the soft-thresholding power
plot(sft_MCC$fitIndices[,1], -sign(sft_MCC$fitIndices[,3])*sft_MCC$fitIndices[,2],
     xlab="Soft Threshold (power)",ylab= paste0("Scale Free Topology Model Fit ", nettype, " R^2"), type="n",
     main = paste("Scale independence"))+text(sft_MCC$fitIndices[,1], -sign(sft_MCC$fitIndices[,3])*sft_MCC$fitIndices[,2],
     labels=powers,cex=1,col="red")+abline(h=0.90,col="red")

# Mean connectivity as a function of the soft-thresholding power
plot(sft_MCC$fitIndices[,1], sft_MCC$fitIndices[,5],
     xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
     main = paste("Mean connectivity"))+text(sft_MCC$fitIndices[,1], sft_MCC$fitIndices[,5], labels=powers, cex=1, col="red")

##Create co-expression matrix=========================
cor <- WGCNA::cor
net_MCC = blockwiseModules(
  t(MCC_expression_data),
  power = 16 , #https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/WGCNA/faq.html Q#6 or based on powerEstimate
  TOMType = "signed", 
  minModuleSize = 30,   # We like large modules, so we set the minimum module size relatively high
  reassignThreshold = 0, 
  mergeCutHeight = 0.25,
  numericLabels = TRUE, 
  pamRespectsDendro = FALSE,
  saveTOMs = F,
  verbose = 3
)
table(net_MCC$colors)
cor<-stats::cor
#saveRDS(net_MCC, file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/gse39612/WGCNA_net_mcc.rds")

5.1 Calculating co-expressed gene modules

MCC_expression_data<-edat[, rownames(GSE39612_annotation[GSE39612_annotation$group=="Merkel cell carcinoma",])]
net_MCC<-readRDS(file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/gse39612/WGCNA_net_mcc.rds")

moduleLabels = net_MCC$colors
MEs = net_MCC$MEs;

# plot Eigengene adjacency heatmap
plotEigengeneNetworks(MEs, "", marDendro = c(0,4,1,2), marHeatmap = c(3,4,1,2), cex.lab = 0.8, xLabelsAngle = 30)

5.2 Performing GO-terms analysis on each gene module from MCC tumor samples

#genemart <- useDataset("hsapiens_gene_ensembl", useMart("ensembl"))
#saveRDS(genemart, file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/genemart.rds")

#1.Organizing genes in each WGCNA module for GO-term analysis
genemart<-readRDS("/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/imr90_wgcna/genemart.rds")
probes = colnames(t(MCC_expression_data))
g_list<-getBM(filters = "hgnc_symbol", 
                       attributes= c("ensembl_gene_id","hgnc_symbol", "entrezgene_id"),values=probes,mart= genemart)
gene_list<-list()
for (i in 1:length(unique(moduleLabels))){
  number<-unique(moduleLabels)[i]
  module_name<-paste0("Module_", number)
  modnumber<-probes[moduleLabels == number]
  df_genes<-g_list[g_list$hgnc_symbol %in% modnumber,"ensembl_gene_id"]
  gene_list[[module_name]]<-df_genes
}

gene_df<-list()
module_name_list<-paste0("Module_", c(1:14))
for (i in 1:14){
  module_name<-module_name_list[i] #remove Module_0, since it's the module with genes that do not co-express with others
  genes<-gene_list[[module_name]]
  df_genes<-g_list[g_list$ensembl_gene_id %in% genes,]
  df_genes<-df_genes[!duplicated(df_genes$hgnc_symbol),]
  gene_df[[module_name]]<-df_genes
}
#2. Performing GO-term over-representation analysis on genes in each module
WGCNA_modules_go_result<-list()
for (i in 1:length(gene_df)){
  module_name<-names(gene_df)[i]
  genes<-gene_df[[module_name]]
  genes<-genes$hgnc_symbol
  em_ER_BP<-enrichGO(genes, 
                     OrgDb = org.Hs.eg.db, 
                     ont = "BP",
                   pAdjustMethod = "BH", 
                     pvalueCutoff = 0.05,
                     qvalueCutoff = 0.25, 
                     keyType = 'SYMBOL')
  go_terms<-em_ER_BP@result
  go_terms<-go_terms[go_terms$p.adjust <= 0.05,]
  go_terms<-go_terms[order(go_terms$Count, decreasing = T),]
  print(module_name)
 WGCNA_modules_go_result[[module_name]]<-em_ER_BP@result
}
#saveRDS(WGCNA_modules_go_result, file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/gse39612/GSE39612_WGCNA_modules_go_result.RDS")

5.3 Performing network centrality analysis and network visualization of gene modules from WGCNA

adjMatrix <- adjacency(t(MCC_expression_data), power = 16, type = "signed")
TOM = TOMsimilarity(adjMatrix)
#saveRDS(TOM, file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/gse39612/WGCNA_TOM.rds")
TOM = readRDS("/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/gse39612/GSE39612_WGCNA_TOM.rds")
probes = colnames(t(MCC_expression_data))
dimnames(TOM)<-list(probes, probes)

# 1.Selecting top hub genes from each gene module
network_hub_list<-list()
nTOM = 40 #top nodes from the WGCNA network
for (i in 1:length(gene_df)){
  module<-names(gene_df)[i]
  genes<-gene_df[[module]][,"hgnc_symbol"]
  m<-TOM[genes, genes]
  hubs<-names(sort(rowSums(m), decreasing = T)[1:nTOM])
  hubs<-na.omit(hubs)
  network_hub_list[[module]]<-hubs
}

network_hub_list # The top 40 hubs in each module
## $Module_1
##  [1] "RO60"     "MED17"    "ANAPC7"   "MSH6"     "ZNF627"   "DNAAF2"  
##  [7] "PA2G4"    "NIF3L1"   "TRMT61B"  "TAF11"    "YEATS4"   "ENOPH1"  
## [13] "CCT6A"    "MSH2"     "SMARCA5"  "PRPF4B"   "METAP2"   "NFYB"    
## [19] "MAPKAPK5" "LARP7"    "NARS2"    "IREB2"    "YAE1"     "CDK4"    
## [25] "PSMD12"   "RBM12"    "SF3B6"    "AIMP1"    "TADA1"    "RPRD1A"  
## [31] "NUP42"    "SNW1"     "DDX18"    "PRPF40A"  "FBXW2"    "ASF1A"   
## [37] "PRELID3B" "HAUS1"    "PCMT1"    "SAE1"    
## 
## $Module_2
##  [1] "LINC00508"   "ST3GAL3"     "OR7E24"      "GTF2IP12"    "LINC00919"  
##  [6] "CLMAT3"      "LINC01844"   "ZNF527"      "MEFV"        "TINAGL1"    
## [11] "NECTIN3-AS1" "TRDN-AS1"    "NAA11"       "LINC00408"   "CSMD2-AS1"  
## [16] "ALDH1B1"     "SSX1"        "CTSE"        "ANHX"        "LRRC43"     
## [21] "ANKRD20A5P"  "FAM66D"      "PIGQ"        "DNAH3"       "LINC02053"  
## [26] "TNFRSF13C"   "HCCAT5"      "CBLIF"       "DSCR4"       "SLC18A1"    
## [31] "C22orf42"    "CYP1A2"      "TRPC7"       "IFNA7"       "GPA33"      
## [36] "E2F4"        "ERVH-1"      "LINC01333"   "SLC25A2"     "TTLL2"      
## 
## $Module_3
##  [1] "CD2"      "IKZF1"    "JAK3"     "ARHGAP30" "HCLS1"    "HLA-DMB" 
##  [7] "CD3D"     "IL2RB"    "APBB1IP"  "RAC2"     "CCL5"     "RCSD1"   
## [13] "IL2RG"    "GPR171"   "HCST"     "CCR5"     "ITGAL"    "FYB1"    
## [19] "CD8A"     "CD84"     "HLA-DOA"  "TRAF3IP3" "CXCL13"   "ITK"     
## [25] "ITGB2"    "CYBB"     "CD48"     "CD4"      "GMFG"     "GIMAP1"  
## [31] "GZMA"     "NLRC3"    "NKG7"     "CD86"     "MNDA"     "CD247"   
## [37] "CD53"     "PRF1"     "EVI2A"    "SLC7A7"  
## 
## $Module_4
##  [1] "KIF22"    "MPHOSPH9" "DLGAP5"   "SSRP1"    "CKS2"     "FEN1"    
##  [7] "NCAPG"    "TIMELESS" "SPAG5"    "MELK"     "PRR11"    "CDCA5"   
## [13] "VRK1"     "LIG1"     "TRIM37"   "C19orf54" "CHAF1A"   "DTYMK"   
## [19] "DDX55"    "NDC80"    "TOP2A"    "PRPF19"   "CCNB1"    "RACGAP1" 
## [25] "PAFAH1B3" "PIN1"     "ZNF606"   "BIRC5"    "CBX2"     "EXOSC2"  
## [31] "UBTF"     "ZNF507"   "SAC3D1"   "POLR2D"   "TTK"      "PSMC3IP" 
## [37] "KIF2C"    "HJURP"    "CDCA7L"   "POLD1"   
## 
## $Module_5
##  [1] "PNLIPRP3"  "LYNX1"     "KLK10"     "ARG1"      "SERPINB13" "RNASE7"   
##  [7] "EPHX3"     "DSC1"      "IL36G"     "SERPINB7"  "LCE3D"     "TPRG1"    
## [13] "MIR205HG"  "WFDC5"     "HAL"       "FAM83C"    "KLK13"     "CYSRT1"   
## [19] "CDSN"      "DSG1"      "CYP4F22"   "DMKN"      "KLK8"      "IL20RB"   
## [25] "KLK7"      "IL36RN"    "PTK6"      "CAPNS2"    "SERPINB3"  "ALOX12B"  
## [31] "CRCT1"     "ASPRV1"    "IVL"       "ABCA12"    "SBSN"      "SERPINB4" 
## [37] "CALML3"    "AADACL2"   "GDPD3"     "BBOX1"    
## 
## $Module_6
##  [1] "LATS2"    "MMP2"     "PRRX1"    "COL6A2"   "PCOLCE"   "IFITM2"  
##  [7] "PMP22"    "FAP"      "DAB2"     "CTSK"     "CCDC80"   "CRISPLD2"
## [13] "MIR100HG" "PDGFRA"   "COPZ2"    "IL1R1"    "TMEM204"  "IGFBP4"  
## [19] "ANXA2"    "IFITM3"   "HEPH"     "FSTL1"    "COL6A3"   "ANGPTL2" 
## [25] "SERPINF1" "MMP19"    "RAB31"    "LRP1"     "GASK1B"   "OLFML3"  
## [31] "S100A10"  "MXRA5"    "PRDM1"    "CYBRD1"   "OLFML1"   "AEBP1"   
## [37] "EFEMP2"   "ANXA2P2"  "TMEM119"  "THBS2"   
## 
## $Module_7
##  [1] "ZBBX"      "OLIG2"     "LINC01561" "SHISA6"    "DRC1"      "GRIA1"    
##  [7] "MAGEA6"    "CRIP3"     "OLIG1"     "MAGEA11"   "C7orf57"   "TTC29"    
## [13] "MAGEA1"    "PALM3"     "C5orf49"   "MAGEA12"   "ROPN1L"    "MAGEA4"   
## [19] "CFAP43"    "KCNIP1"    "PDE10A"    "TBX5-AS1"  "ARMC3"     "CFAP126"  
## [25] "EZHIP"     "ZSCAN4"    "ZNF204P"   "STRC"      "SST"       "ZPBP2"    
## [31] "RSPH1"     "SPATA17"   "PACRG"     "MKX"       "C20orf85"  "PDYN"     
## [37] "TTYH1"     "CFAP46"    "EFCAB12"   "KBTBD11"  
## 
## $Module_8
##  [1] "LTN1"     "MIS18BP1" "THRAP3"   "USP47"    "OSBPL8"   "GOLIM4"  
##  [7] "NIPBL"    "WASL"     "WNK1"     "DYNC1H1"  "ATAD2"    "SYNCRIP" 
## [13] "EIF4G1"   "PRRC2C"   "ATRX"     "WASF2"    "GON4L"    "PNRC2"   
## [19] "FMNL2"    "NASP"     "SFSWAP"   "FXR1"     "PPP1R10"  "SRRM2"   
## [25] "SON"      "EHBP1"    "ILF3"     "ZBED6"    "DDX24"    "ATXN7L3B"
## [31] "SF3B1"    "UPF1"     "CCDC88A"  "LUZP1"    "SMC5"     "PPFIA1"  
## [37] "KMT2A"    "UBN2"     "TOP1"     "THOC2"   
## 
## $Module_9
##  [1] "PIGR"      "BPIFA2"    "KCNJ16"    "ODAM"      "SCGB3A1"   "LPO"      
##  [7] "MUC7"      "SLC13A5"   "SMR3B"     "DNASE2B"   "CST5"      "CXCL17"   
## [13] "SMR3A"     "PLEKHS1"   "FXYD2"     "PAX9"      "PRB4"      "BPIFB1"   
## [19] "DMBT1"     "C9orf152"  "PRB1"      "ELF5"      "TMEM213"   "CFTR"     
## [25] "ZG16B"     "FAM3D"     "TF"        "MYOC"      "SERPINA3"  "ETNPPL"   
## [31] "SCN7A"     "SOX10"     "CA6"       "FOLH1B"    "PRB3"      "GCNT3"    
## [37] "LINC01549" "NDRG2"     "C16orf89"  "LINC01482"
## 
## $Module_10
##  [1] "PRDX2"     "MZT2B"     "PPP6R2"    "ZC3H7B"    "MINDY1"    "XRCC2"    
##  [7] "ARHGEF1"   "TMEM106C"  "MEIOC"     "MTMR9"     "INTS4"     "HAUS2"    
## [13] "DIP2A"     "FAM161B"   "PODNL1"    "CCDC152"   "UNC45A"    "ZNF611"   
## [19] "UBQLN4"    "FBXW12"    "ENTPD6"    "SLF2"      "IWS1"      "LINC01692"
## [25] "NFKBID"    "ICAM4"     "PTAR1"     "C16orf92"  "WDR62"     "GVQW3"    
## [31] "EIF3C"     "BTBD18"    "FOXP1-IT1" "ARHGAP21"  "HCG18"     "GGA1"     
## [37] "GRM2"      "RAMP2-AS1" "TRIR"      "LINC01635"
## 
## $Module_11
##  [1] "LINC01777"  "OR6B1"      "IL9R"       "DKK4"       "LINC00896" 
##  [6] "LYZL4"      "LINC01364"  "DUSP21"     "PLA2G1B"    "CECR3"     
## [11] "MAB21L2"    "OR2H1"      "CST4"       "KYAT1"      "PCDHB1"    
## [16] "EDARADD"    "DEFA5"      "GPR78"      "KAAG1"      "SRARP"     
## [21] "DLGAP1-AS1" "OR5P2"      "SIM2"       "LINC02175"  "NXF5"      
## [26] "MAP3K11"    "OR1C1"      "HTR1B"      "TACR2"      "LINC01209" 
## [31] "POU6F2-AS2" "NKD1"       "ZNF486"     "OR10A4"     "SP5"       
## [36] "OLAH"       "TIMM17B"    "LINC00550"  "QPCTL"      "KIAA1328"  
## 
## $Module_12
##  [1] "CHCHD1"       "RPLP0"        "RPL6"         "PPA1"         "UBE2E1"      
##  [6] "RPL18"        "RPL10A"       "COX18"        "BLOC1S2"      "RPL11"       
## [11] "DPH5"         "RPL19"        "RPS18"        "RPS9"         "RPL8"        
## [16] "RPS16"        "RPS3"         "OXA1L"        "RPS14"        "EXOSC1"      
## [21] "RPS4X"        "CIAO2B"       "RPL30"        "RPL14"        "EPB41L4A-AS1"
## [26] "MAP1LC3B"     "RPL3"         "PABPC4"       "NOA1"         "RPL7"        
## [31] "EIF3A"        "NCOA4"        "RPL41"        "RPL35"        "LTA4H"       
## [36] "CCDC28A"      "IGBP1"        "RPL9"         "RPL36AL"      "PRDX4"       
## 
## $Module_13
##  [1] "MYH1"      "MYH2"      "CSRP3"     "MYBPC1"    "NRAP"      "MYL1"     
##  [7] "ANKRD1"    "HSFX3"     "TCAP"      "CKM"       "TTN"       "TNNC2"    
## [13] "ACTA1"     "MYBPC2"    "XIRP2"     "KLHL41"    "TNNC1"     "PYGM"     
## [19] "TNNT3"     "MYBPH"     "SYNPO2L"   "ANKRD31"   "PVALB"     "HSPB6"    
## [25] "COX6A2"    "LINC01352" "MYOT"      "CD22"      "XIRP1"     "ACTN2"    
## [31] "ARHGAP22"  "LINC00343" "VPREB3"    "CELA2B"    "AGR3"      "FAM47B"   
## [37] "TNNI2"     "CD19"      "CCDC120"   "ASB11"    
## 
## $Module_14
##  [1] "OR8D2"       "GP6"         "LINC00636"   "LINC02186"   "CCR9"       
##  [6] "CNTFR-AS1"   "SCTR"        "SLC6A2"      "ADAM7"       "AMELY"      
## [11] "TRHDE-AS1"   "SP8"         "HYAL4"       "SLC38A11"    "TSSK4"      
## [16] "LINC01180"   "PCDH15"      "LINC02458"   "LINC00862"   "MIR1915HG"  
## [21] "FGA"         "SLC30A8"     "ATP1A4"      "GNRHR"       "VN1R1"      
## [26] "MOBP"        "LINC00606"   "TMEM254-AS1" "MAGEE2"      "LINC02274"  
## [31] "CASC16"      "PRMT5-AS1"   "ZPLD1"       "TMEM171"     "CSTF3-DT"   
## [36] "FAM224A"     "OR2F2"       "C1orf100"    "TPT1-AS1"    "KIRREL3"
# 2. Generating adjacency matrix for hub genes from all modules
hub_genes<-unname(unlist(network_hub_list))
hubs_m<-TOM[hub_genes, hub_genes]
edges<-data.frame(from=rownames(hubs_m)[row(hubs_m)[upper.tri(hubs_m)]], 
           to=colnames(hubs_m)[col(hubs_m)[upper.tri(hubs_m)]], 
           value=hubs_m[upper.tri(hubs_m)])

# 3. Setting up thredshold for selecting leading edges in the network
leading_edges_ratio = 0.20 #top 1/5 edges based on edge weight.
threshold = quantile(edges$value, probs=1-leading_edges_ratio , na.rm = FALSE)
edges<-edges[edges$value >= threshold,]

# 4. Organizing vertices data frame and edges data frame for the network
vertices<-data.frame()
for(i in 1:length(network_hub_list)){
  module_name<-names(network_hub_list)[i]
  v<-data.frame(name = network_hub_list[[i]], module = rep(module_name, nTOM), size = rep(8, nTOM))
  vertices<-rbind(vertices, v)
}

vertices[,"group"]<-unlist(lapply(vertices$module, function(x) strsplit(x, "_")[[1]][2]))
vertices[,"id"]<-rownames(vertices)


# 5. Centrality analysis of hub genes co-expression network
g<-graph_from_data_frame(edges, directed = F, vertices)
eigenvector<-eigen_centrality(g)$vector
betweenness<-betweenness(g)
degree<-degree(g)
closeness<-closeness(g)
centralities <- cbind(degree, eigenvector, betweenness, closeness)
centrality_df<-cbind(centralities, vertices$module)
#write.csv(centrality_df, file = "/xdisk/mpadi/jiawenyang/result/merkel_cell/pp_in_mcc_paper/table/IMR90_WGCNA_hubs_netwrok_centrality_analysis.csv")
DT::datatable(centrality_df) #show centrality data of nodes for sorting
# 6. Visualize the hub genes co-expression network
edges[,"source"]<-vertices[match(edges$from, vertices$name), "id"]
edges[,"target"]<-vertices[match(edges$to, vertices$name), "id"]
edges$source<-as.numeric(edges$source)
edges$target<-as.numeric(edges$target)

edges<-data.frame(source = edges$source-1, target = edges$target-1, value = edges$value)
vertices<-cbind(vertices, centralities)
vertices[,"betweenness_sqrt"]<-sqrt(vertices$betweenness)

fn<-forceNetwork(Links = edges, Nodes = vertices,
                 Source = "source", Target = "target",
                 Nodesize = "betweenness_sqrt", fontSize = 12,
                 Value = "value", NodeID = "name",
                 Group = "group", opacity = 1,
                 legend = T, charge = -20, zoom = F,
                 opacityNoHover = 1,
                 linkWidth = JS("function(d) { return Math.cbrt(d.value)*0.4; }")
                 )
fn

5.4 Plotting the gene modules that closely co-expressed in MCC tumor samples

theme<-theme(panel.background = element_blank(),
             panel.border=element_rect(fill=NA),
             panel.grid.major = element_blank(),
             panel.grid.minor = element_blank(),
             strip.background=element_blank(),
             axis.text.x=element_text(colour="black"),
             axis.text.y=element_text(colour="black"),
             axis.ticks=element_line(colour="black"),
             plot.margin=unit(c(1,1,1,1),"line"),
             axis.title.x = element_text(color="black", size=18, face="bold"),
             axis.title.y = element_text(color="black", size=18, face="bold"),
             axis.text = element_text(size = 12, face = "bold"))

n = 25
em_m3_BP<-enrichGO(gene_df$Module_3$hgnc_symbol, 
                     OrgDb = org.Hs.eg.db, 
                     ont = "BP",
                     pAdjustMethod = "BH", 
                     pvalueCutoff = 0.05,
                     qvalueCutoff = 0.25, 
                     keyType = 'SYMBOL')
m3_BP<-em_m3_BP@result[order(em_m3_BP@result$Count, decreasing = T),][1:n,]
pm3<-ggplot(m3_BP, aes(x=Count, y=factor(Description, levels = rev(c(m3_BP$Description))), size = Count, color = p.adjust))+
      geom_point() +
      ylab("Enriched GO Terms in module 3")+
      scale_colour_gradient(low = "#F16913", high = "#FDD0A2") +
      scale_fill_gradient(low = "#F16913", high = "#FDD0A2") +
      scale_y_discrete(labels = function(x) str_wrap(x, width = 40))+
      theme_set(theme_bw() +
      theme(legend.position = "right"))+
      theme

em_m5_BP<-enrichGO(gene_df$Module_5$hgnc_symbol, 
                     OrgDb = org.Hs.eg.db, 
                     ont = "BP",
                     pAdjustMethod = "BH", 
                     pvalueCutoff = 0.05,
                     qvalueCutoff = 0.25, 
                     keyType = 'SYMBOL')
m5_BP<-em_m5_BP@result[order(em_m5_BP@result$Count, decreasing = T),][1:n,]
pm5<-ggplot(m5_BP, aes(x=Count, y=factor(Description, levels = rev(c(m5_BP$Description))), size = Count, color = p.adjust))+
      geom_point() +
      ylab("Enriched GO Terms in module 5")+
      scale_colour_gradient(low = "#238B45", high = "#A1D99B") +
      scale_fill_gradient(low = "#238B45", high = "#A1D99B") +
      scale_y_discrete(labels = function(x) str_wrap(x, width = 40))+
      theme_set(theme_bw() +
      theme(legend.position = "right"))+
      theme

em_m6_BP<-enrichGO(gene_df$Module_6$hgnc_symbol, 
                     OrgDb = org.Hs.eg.db, 
                     ont = "BP",
                     pAdjustMethod = "BH", 
                     pvalueCutoff = 0.05,
                     qvalueCutoff = 0.25, 
                     keyType = 'SYMBOL')
m6_BP<-em_m6_BP@result[order(em_m6_BP@result$Count, decreasing = T),][1:n,]
pm6<-ggplot(m6_BP, aes(x=Count, y=factor(Description, levels = rev(c(m6_BP$Description))), size = Count, color = p.adjust))+
      geom_point() +
      ylab("Enriched GO Terms in module 6")+
      scale_colour_gradient(low = "#74C476", high = "#E5F5E0") +
      scale_fill_gradient(low = "#74C476", high = "#E5F5E0") +
      scale_y_discrete(labels = function(x) str_wrap(x, width = 40))+
      theme_set(theme_bw() +
      theme(legend.position = "right"))+
      theme

WGCNA_module_GO_terms_plot<-ggarrange(pm3, pm5, pm6,
              ncol = 3,
              nrow = 1
) 
WGCNA_module_GO_terms_plot

cor_df<-list() for (i in 1:length(IMR90_5_period)){ file<-IMR90_5_period[i] id<-strsplit(basename(file), “[.]”)[[1]][1] id<-paste0(strsplit(id, “”)[[1]][4], ””, strsplit(id, “”)[[1]][5]) df<-readRDS(file) df<-reshape2::melt(df\(pc) df\)typeofVar1<-unlist(lapply(as.character(df\(Var1), function(x) strsplit(x, "_")[[1]][2])) df\)typeofVar2<-unlist(lapply(as.character(df\(Var2), function(x) strsplit(x,"_")[[1]][2])) colnames(df)<-c("Var1", "Var2", "Pearson correlation coefficient", "typeofVar1", "typeofVar2") df\)p.value<-reshape2::melt(readRDS(file)\(pc.pvalue)[,"value"] df<-df[df\)Var1!=df\(Var2,] df1<-df[df\)typeofVar1 == ”neural”,] df2<-df[df\(typeofVar1 =="wnt" & df\)typeofVar2 == ”wnt”, ] df<-rbind(df1, df2) df<-df[df\(p.value <=0.01,] df\)Var1<-unlist(lapply(as.character(df\(Var1), function(x) strsplit(x, "_")[[1]][1])) df\)Var2<-unlist(lapply(as.character(df$Var2), function(x) strsplit(x, ””)[[1]][1])) cor_df[[id]]<-df }

for(i in 1:length(names(cor_df))){ df<-cor_df[[i]] name<-names(cor_df)[i] #pdf(file = paste0(COR_RESULT_DIR, name,“.pdf”)) p=ggplot(df, aes(x = Pearson correlation coefficient))+ geom_histogram(color = “darkblue”, fill = “lightblue”, binwidth = 0.1)+ labs(title = paste0(“Wnt and neural genes correlation histogram -”, name), y = “Count”) + ylim(0, 8000)+ geom_vline(aes(xintercept = c(0.8)), color = “red”, linetype = “dashed”, size = 1)+ geom_vline(aes(xintercept = c(-0.8)), color = “blue”, linetype = “dashed”, size = 1) p=p+theme print(p) } ```